Even though there are many variables in the dataframe, here is a quick description of some of the variables collected.

listings1 <- listings %>% 
  group_by(property_type) %>% 
  summarize(count = n()) 
view(listings1)

Exploratory Data Analysis

Looking at the raw values

listings %>%
glimpse()
Rows: 6,296
Columns: 74
$ id                                           <dbl> 2797791, 4990531, 6619374…
$ listing_url                                  <chr> "https://www.airbnb.com/r…
$ scrape_id                                    <dbl> 2.021093e+13, 2.021093e+1…
$ last_scraped                                 <date> 2021-09-29, 2021-09-28, …
$ name                                         <chr> "Beijing Great Wall Escap…
$ description                                  <chr> "A perfect escape only 2 …
$ neighborhood_overview                        <chr> "Located in a small villa…
$ picture_url                                  <chr> "https://a0.muscache.com/…
$ host_id                                      <dbl> 14311129, 25729513, 34492…
$ host_url                                     <chr> "https://www.airbnb.com/u…
$ host_name                                    <chr> "Andrew", "Joel", "乐林",…
$ host_since                                   <date> 2014-04-15, 2015-01-07, …
$ host_location                                <chr> "Beijing, Beijing, China"…
$ host_about                                   <chr> "Been living in Beijing f…
$ host_response_time                           <chr> "within a few hours", "wi…
$ host_response_rate                           <chr> "100%", "100%", "N/A", "1…
$ host_acceptance_rate                         <chr> "73%", "99%", "N/A", "100…
$ host_is_superhost                            <lgl> FALSE, FALSE, FALSE, FALS…
$ host_thumbnail_url                           <chr> "https://a0.muscache.com/…
$ host_picture_url                             <chr> "https://a0.muscache.com/…
$ host_neighbourhood                           <chr> NA, "Shichahai", NA, NA, …
$ host_listings_count                          <dbl> 1, 10, 2, 1, 3, 1, 5, 5, …
$ host_total_listings_count                    <dbl> 1, 10, 2, 1, 3, 1, 5, 5, …
$ host_verifications                           <chr> "['email', 'phone', 'revi…
$ host_has_profile_pic                         <lgl> TRUE, TRUE, TRUE, TRUE, T…
$ host_identity_verified                       <lgl> TRUE, TRUE, TRUE, TRUE, T…
$ neighbourhood                                <chr> "Beijing, China", "Beijin…
$ neighbourhood_cleansed                       <chr> "怀柔区 / Huairou", "东城…
$ neighbourhood_group_cleansed                 <lgl> NA, NA, NA, NA, NA, NA, N…
$ latitude                                     <dbl> 40.47329, 39.94193, 40.44…
$ longitude                                    <dbl> 116.5451, 116.3984, 116.0…
$ property_type                                <chr> "Entire residential home"…
$ room_type                                    <chr> "Entire home/apt", "Entir…
$ accommodates                                 <dbl> 10, 4, 15, 16, 12, 16, 12…
$ bathrooms                                    <lgl> NA, NA, NA, NA, NA, NA, N…
$ bathrooms_text                               <chr> "1 bath", "1 bath", "4 ba…
$ bedrooms                                     <dbl> 3, 1, 4, 1, 4, 5, 2, 4, 4…
$ beds                                         <dbl> 3, 2, 4, 2, 5, 9, 11, 12,…
$ amenities                                    <chr> "[\"Dishes and silverware…
$ price                                        <chr> "$1,914.00", "$1,610.00",…
$ minimum_nights                               <dbl> 1, 29, 1, 1, 1, 1, 1, 1, …
$ maximum_nights                               <dbl> 1125, 365, 1125, 1125, 11…
$ minimum_minimum_nights                       <dbl> 1, 29, 1, 1, 1, 1, 1, 1, …
$ maximum_minimum_nights                       <dbl> 1, 29, 1, 1, 1, 1, 1, 1, …
$ minimum_maximum_nights                       <dbl> 1125, 1125, 1125, 1125, 1…
$ maximum_maximum_nights                       <dbl> 1125, 1125, 1125, 1125, 1…
$ minimum_nights_avg_ntm                       <dbl> 1, 29, 1, 1, 1, 1, 1, 1, …
$ maximum_nights_avg_ntm                       <dbl> 1125, 1125, 1125, 1125, 1…
$ calendar_updated                             <lgl> NA, NA, NA, NA, NA, NA, N…
$ has_availability                             <lgl> TRUE, TRUE, TRUE, TRUE, T…
$ availability_30                              <dbl> 20, 0, 25, 30, 24, 26, 24…
$ availability_60                              <dbl> 50, 0, 29, 60, 27, 56, 54…
$ availability_90                              <dbl> 53, 0, 29, 90, 27, 86, 84…
$ availability_365                             <dbl> 234, 118, 29, 365, 298, 3…
$ calendar_last_scraped                        <date> 2021-09-29, 2021-09-28, …
$ number_of_reviews                            <dbl> 56, 20, 3, 1, 2, 14, 61, …
$ number_of_reviews_ltm                        <dbl> 3, 0, 0, 0, 1, 3, 15, 5, …
$ number_of_reviews_l30d                       <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0…
$ first_review                                 <date> 2015-05-04, 2016-12-31, …
$ last_review                                  <date> 2019-05-11, 2020-07-17, …
$ review_scores_rating                         <dbl> 4.63, 4.68, 5.00, 0.00, 5…
$ review_scores_accuracy                       <dbl> 4.72, 4.71, 5.00, NA, 5.0…
$ review_scores_cleanliness                    <dbl> 4.24, 4.82, 4.67, NA, 5.0…
$ review_scores_checkin                        <dbl> 4.89, 5.00, 5.00, NA, 5.0…
$ review_scores_communication                  <dbl> 4.92, 4.88, 5.00, NA, 5.0…
$ review_scores_location                       <dbl> 4.91, 4.88, 5.00, NA, 5.0…
$ review_scores_value                          <dbl> 4.30, 4.76, 4.33, NA, 5.0…
$ license                                      <lgl> NA, NA, NA, NA, NA, NA, N…
$ instant_bookable                             <lgl> FALSE, TRUE, FALSE, TRUE,…
$ calculated_host_listings_count               <dbl> 1, 10, 1, 1, 3, 1, 1, 2, …
$ calculated_host_listings_count_entire_homes  <dbl> 1, 6, 1, 0, 3, 1, 1, 2, 3…
$ calculated_host_listings_count_private_rooms <dbl> 0, 4, 0, 1, 0, 0, 0, 0, 0…
$ calculated_host_listings_count_shared_rooms  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ reviews_per_month                            <dbl> 0.72, 0.35, 0.11, 0.02, 0…

Summary statistics

listings %>%
skim()
Data summary
Name Piped data
Number of rows 6296
Number of columns 74
_______________________
Column type frequency:
character 23
Date 5
logical 9
numeric 37
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
listing_url 0 1.00 36 37 0 6296 0
name 0 1.00 1 185 0 6117 0
description 265 0.96 2 1000 0 5166 0
neighborhood_overview 1139 0.82 2 1000 0 3790 0
picture_url 0 1.00 63 112 0 6032 0
host_url 0 1.00 41 43 0 2665 0
host_name 0 1.00 1 41 0 2299 0
host_location 4 1.00 2 40 0 41 0
host_about 3332 0.47 1 4820 0 1063 1
host_response_time 0 1.00 3 18 0 5 0
host_response_rate 0 1.00 2 4 0 37 0
host_acceptance_rate 0 1.00 2 4 0 46 0
host_thumbnail_url 0 1.00 55 106 0 2658 0
host_picture_url 0 1.00 57 109 0 2658 0
host_neighbourhood 5938 0.06 2 25 0 28 0
host_verifications 0 1.00 2 151 0 99 0
neighbourhood 1139 0.82 14 34 0 8 0
neighbourhood_cleansed 0 1.00 3 16 0 16 0
property_type 0 1.00 3 35 0 80 0
room_type 0 1.00 11 15 0 3 0
bathrooms_text 5 1.00 6 17 0 70 0
amenities 0 1.00 27 1206 0 5199 0
price 0 1.00 6 10 0 3029 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
last_scraped 0 1.00 2021-09-28 2021-09-29 2021-09-28 2
host_since 0 1.00 2013-02-06 2021-09-19 2019-05-21 1370
calendar_last_scraped 0 1.00 2021-09-28 2021-09-29 2021-09-28 2
first_review 3203 0.49 2015-05-04 2021-09-28 2020-07-18 843
last_review 3203 0.49 2016-04-04 2021-09-28 2021-04-30 730

Variable type: logical

skim_variable n_missing complete_rate mean count
host_is_superhost 0 1 0.24 FAL: 4811, TRU: 1485
host_has_profile_pic 0 1 1.00 TRU: 6288, FAL: 8
host_identity_verified 0 1 1.00 TRU: 6279, FAL: 17
neighbourhood_group_cleansed 6296 0 NaN :
bathrooms 6296 0 NaN :
calendar_updated 6296 0 NaN :
has_availability 0 1 1.00 TRU: 6296
license 6296 0 NaN :
instant_bookable 0 1 0.65 TRU: 4111, FAL: 2185

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 4.068584e+07 9018880.17 2.797791e+06 3.513261e+07 4.309471e+07 4.883673e+07 5.245929e+07 ▁▁▂▅▇
scrape_id 0 1.00 2.021093e+13 0.00 2.021093e+13 2.021093e+13 2.021093e+13 2.021093e+13 2.021093e+13 ▁▁▇▁▁
host_id 0 1.00 2.545746e+08 106847611.68 4.984459e+06 1.828459e+08 2.631712e+08 3.492971e+08 4.236643e+08 ▂▅▆▇▇
host_listings_count 0 1.00 7.470000e+00 14.76 0.000000e+00 1.000000e+00 5.000000e+00 9.000000e+00 2.570000e+02 ▇▁▁▁▁
host_total_listings_count 0 1.00 7.470000e+00 14.76 0.000000e+00 1.000000e+00 5.000000e+00 9.000000e+00 2.570000e+02 ▇▁▁▁▁
latitude 0 1.00 4.031000e+01 0.30 3.947000e+01 4.019000e+01 4.041000e+01 4.050000e+01 4.095000e+01 ▁▂▂▇▁
longitude 0 1.00 1.164300e+02 0.47 1.154400e+02 1.160200e+02 1.164200e+02 1.167000e+02 1.175000e+02 ▂▆▇▃▃
accommodates 0 1.00 7.100000e+00 5.11 1.000000e+00 2.000000e+00 5.000000e+00 1.200000e+01 1.600000e+01 ▇▂▂▂▃
bedrooms 61 0.99 3.060000e+00 2.49 1.000000e+00 1.000000e+00 2.000000e+00 5.000000e+00 2.500000e+01 ▇▂▁▁▁
beds 19 1.00 4.310000e+00 4.33 0.000000e+00 1.000000e+00 3.000000e+00 6.000000e+00 7.100000e+01 ▇▁▁▁▁
minimum_nights 0 1.00 1.380000e+00 9.33 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.650000e+02 ▇▁▁▁▁
maximum_nights 0 1.00 8.738100e+02 418.94 1.000000e+00 3.650000e+02 1.125000e+03 1.125000e+03 1.125000e+03 ▂▂▁▁▇
minimum_minimum_nights 0 1.00 1.360000e+00 9.30 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.650000e+02 ▇▁▁▁▁
maximum_minimum_nights 0 1.00 1.540000e+00 15.67 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+03 ▇▁▁▁▁
minimum_maximum_nights 0 1.00 9.331500e+02 378.79 1.000000e+00 1.125000e+03 1.125000e+03 1.125000e+03 1.125000e+03 ▁▁▁▁▇
maximum_maximum_nights 0 1.00 9.353200e+02 377.05 1.000000e+00 1.125000e+03 1.125000e+03 1.125000e+03 1.125000e+03 ▁▁▁▁▇
minimum_nights_avg_ntm 0 1.00 1.410000e+00 9.98 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.650000e+02 ▇▁▁▁▁
maximum_nights_avg_ntm 0 1.00 9.349400e+02 376.98 1.000000e+00 1.125000e+03 1.125000e+03 1.125000e+03 1.125000e+03 ▁▁▁▁▇
availability_30 0 1.00 1.917000e+01 9.82 0.000000e+00 1.600000e+01 2.400000e+01 2.500000e+01 3.000000e+01 ▅▁▁▇▇
availability_60 0 1.00 4.524000e+01 16.59 0.000000e+00 3.500000e+01 5.300000e+01 5.500000e+01 6.000000e+01 ▁▁▂▁▇
availability_90 0 1.00 7.160000e+01 24.32 0.000000e+00 6.300000e+01 8.300000e+01 8.500000e+01 9.000000e+01 ▁▁▁▂▇
availability_365 0 1.00 2.493600e+02 126.21 0.000000e+00 1.530000e+02 3.370000e+02 3.590000e+02 3.650000e+02 ▂▂▂▁▇
number_of_reviews 0 1.00 3.300000e+00 11.44 0.000000e+00 0.000000e+00 0.000000e+00 2.000000e+00 4.600000e+02 ▇▁▁▁▁
number_of_reviews_ltm 0 1.00 1.450000e+00 4.37 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 9.700000e+01 ▇▁▁▁▁
number_of_reviews_l30d 0 1.00 1.300000e-01 0.64 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.600000e+01 ▇▁▁▁▁
review_scores_rating 3203 0.49 4.670000e+00 1.00 0.000000e+00 4.840000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_accuracy 3312 0.47 4.900000e+00 0.37 1.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_cleanliness 3312 0.47 4.870000e+00 0.39 1.000000e+00 4.920000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_checkin 3312 0.47 4.910000e+00 0.36 1.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_communication 3312 0.47 4.920000e+00 0.35 1.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_location 3312 0.47 4.860000e+00 0.38 1.000000e+00 4.890000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_value 3312 0.47 4.800000e+00 0.48 1.000000e+00 4.800000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
calculated_host_listings_count 0 1.00 5.910000e+00 6.12 1.000000e+00 1.000000e+00 4.000000e+00 8.000000e+00 3.300000e+01 ▇▂▁▁▁
calculated_host_listings_count_entire_homes 0 1.00 2.350000e+00 3.83 0.000000e+00 0.000000e+00 1.000000e+00 2.000000e+00 3.100000e+01 ▇▁▁▁▁
calculated_host_listings_count_private_rooms 0 1.00 3.490000e+00 5.33 0.000000e+00 0.000000e+00 1.000000e+00 5.000000e+00 3.300000e+01 ▇▁▁▁▁
calculated_host_listings_count_shared_rooms 0 1.00 7.000000e-02 0.64 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 9.000000e+00 ▇▁▁▁▁
reviews_per_month 3203 0.49 5.200000e-01 0.84 2.000000e-02 1.100000e-01 2.600000e-01 6.100000e-01 1.600000e+01 ▇▁▁▁▁

Data wrangling

Since price is a quantitative variable, we need to make sure it is stored as numeric data num in the dataframe.

listings <- listings %>% 
  mutate(price = parse_number(price))
typeof(listings$price)
[1] "double"
skim(listings)
Data summary
Name listings
Number of rows 6296
Number of columns 74
_______________________
Column type frequency:
character 22
Date 5
logical 9
numeric 38
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
listing_url 0 1.00 36 37 0 6296 0
name 0 1.00 1 185 0 6117 0
description 265 0.96 2 1000 0 5166 0
neighborhood_overview 1139 0.82 2 1000 0 3790 0
picture_url 0 1.00 63 112 0 6032 0
host_url 0 1.00 41 43 0 2665 0
host_name 0 1.00 1 41 0 2299 0
host_location 4 1.00 2 40 0 41 0
host_about 3332 0.47 1 4820 0 1063 1
host_response_time 0 1.00 3 18 0 5 0
host_response_rate 0 1.00 2 4 0 37 0
host_acceptance_rate 0 1.00 2 4 0 46 0
host_thumbnail_url 0 1.00 55 106 0 2658 0
host_picture_url 0 1.00 57 109 0 2658 0
host_neighbourhood 5938 0.06 2 25 0 28 0
host_verifications 0 1.00 2 151 0 99 0
neighbourhood 1139 0.82 14 34 0 8 0
neighbourhood_cleansed 0 1.00 3 16 0 16 0
property_type 0 1.00 3 35 0 80 0
room_type 0 1.00 11 15 0 3 0
bathrooms_text 5 1.00 6 17 0 70 0
amenities 0 1.00 27 1206 0 5199 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
last_scraped 0 1.00 2021-09-28 2021-09-29 2021-09-28 2
host_since 0 1.00 2013-02-06 2021-09-19 2019-05-21 1370
calendar_last_scraped 0 1.00 2021-09-28 2021-09-29 2021-09-28 2
first_review 3203 0.49 2015-05-04 2021-09-28 2020-07-18 843
last_review 3203 0.49 2016-04-04 2021-09-28 2021-04-30 730

Variable type: logical

skim_variable n_missing complete_rate mean count
host_is_superhost 0 1 0.24 FAL: 4811, TRU: 1485
host_has_profile_pic 0 1 1.00 TRU: 6288, FAL: 8
host_identity_verified 0 1 1.00 TRU: 6279, FAL: 17
neighbourhood_group_cleansed 6296 0 NaN :
bathrooms 6296 0 NaN :
calendar_updated 6296 0 NaN :
has_availability 0 1 1.00 TRU: 6296
license 6296 0 NaN :
instant_bookable 0 1 0.65 TRU: 4111, FAL: 2185

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 4.068584e+07 9018880.17 2.797791e+06 3.513261e+07 4.309471e+07 4.883673e+07 5.245929e+07 ▁▁▂▅▇
scrape_id 0 1.00 2.021093e+13 0.00 2.021093e+13 2.021093e+13 2.021093e+13 2.021093e+13 2.021093e+13 ▁▁▇▁▁
host_id 0 1.00 2.545746e+08 106847611.68 4.984459e+06 1.828459e+08 2.631712e+08 3.492971e+08 4.236643e+08 ▂▅▆▇▇
host_listings_count 0 1.00 7.470000e+00 14.76 0.000000e+00 1.000000e+00 5.000000e+00 9.000000e+00 2.570000e+02 ▇▁▁▁▁
host_total_listings_count 0 1.00 7.470000e+00 14.76 0.000000e+00 1.000000e+00 5.000000e+00 9.000000e+00 2.570000e+02 ▇▁▁▁▁
latitude 0 1.00 4.031000e+01 0.30 3.947000e+01 4.019000e+01 4.041000e+01 4.050000e+01 4.095000e+01 ▁▂▂▇▁
longitude 0 1.00 1.164300e+02 0.47 1.154400e+02 1.160200e+02 1.164200e+02 1.167000e+02 1.175000e+02 ▂▆▇▃▃
accommodates 0 1.00 7.100000e+00 5.11 1.000000e+00 2.000000e+00 5.000000e+00 1.200000e+01 1.600000e+01 ▇▂▂▂▃
bedrooms 61 0.99 3.060000e+00 2.49 1.000000e+00 1.000000e+00 2.000000e+00 5.000000e+00 2.500000e+01 ▇▂▁▁▁
beds 19 1.00 4.310000e+00 4.33 0.000000e+00 1.000000e+00 3.000000e+00 6.000000e+00 7.100000e+01 ▇▁▁▁▁
price 0 1.00 2.417430e+03 2676.84 5.900000e+01 6.190000e+02 1.555000e+03 3.511500e+03 6.399500e+04 ▇▁▁▁▁
minimum_nights 0 1.00 1.380000e+00 9.33 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.650000e+02 ▇▁▁▁▁
maximum_nights 0 1.00 8.738100e+02 418.94 1.000000e+00 3.650000e+02 1.125000e+03 1.125000e+03 1.125000e+03 ▂▂▁▁▇
minimum_minimum_nights 0 1.00 1.360000e+00 9.30 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.650000e+02 ▇▁▁▁▁
maximum_minimum_nights 0 1.00 1.540000e+00 15.67 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+03 ▇▁▁▁▁
minimum_maximum_nights 0 1.00 9.331500e+02 378.79 1.000000e+00 1.125000e+03 1.125000e+03 1.125000e+03 1.125000e+03 ▁▁▁▁▇
maximum_maximum_nights 0 1.00 9.353200e+02 377.05 1.000000e+00 1.125000e+03 1.125000e+03 1.125000e+03 1.125000e+03 ▁▁▁▁▇
minimum_nights_avg_ntm 0 1.00 1.410000e+00 9.98 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 3.650000e+02 ▇▁▁▁▁
maximum_nights_avg_ntm 0 1.00 9.349400e+02 376.98 1.000000e+00 1.125000e+03 1.125000e+03 1.125000e+03 1.125000e+03 ▁▁▁▁▇
availability_30 0 1.00 1.917000e+01 9.82 0.000000e+00 1.600000e+01 2.400000e+01 2.500000e+01 3.000000e+01 ▅▁▁▇▇
availability_60 0 1.00 4.524000e+01 16.59 0.000000e+00 3.500000e+01 5.300000e+01 5.500000e+01 6.000000e+01 ▁▁▂▁▇
availability_90 0 1.00 7.160000e+01 24.32 0.000000e+00 6.300000e+01 8.300000e+01 8.500000e+01 9.000000e+01 ▁▁▁▂▇
availability_365 0 1.00 2.493600e+02 126.21 0.000000e+00 1.530000e+02 3.370000e+02 3.590000e+02 3.650000e+02 ▂▂▂▁▇
number_of_reviews 0 1.00 3.300000e+00 11.44 0.000000e+00 0.000000e+00 0.000000e+00 2.000000e+00 4.600000e+02 ▇▁▁▁▁
number_of_reviews_ltm 0 1.00 1.450000e+00 4.37 0.000000e+00 0.000000e+00 0.000000e+00 1.000000e+00 9.700000e+01 ▇▁▁▁▁
number_of_reviews_l30d 0 1.00 1.300000e-01 0.64 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 1.600000e+01 ▇▁▁▁▁
review_scores_rating 3203 0.49 4.670000e+00 1.00 0.000000e+00 4.840000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_accuracy 3312 0.47 4.900000e+00 0.37 1.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_cleanliness 3312 0.47 4.870000e+00 0.39 1.000000e+00 4.920000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_checkin 3312 0.47 4.910000e+00 0.36 1.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_communication 3312 0.47 4.920000e+00 0.35 1.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_location 3312 0.47 4.860000e+00 0.38 1.000000e+00 4.890000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
review_scores_value 3312 0.47 4.800000e+00 0.48 1.000000e+00 4.800000e+00 5.000000e+00 5.000000e+00 5.000000e+00 ▁▁▁▁▇
calculated_host_listings_count 0 1.00 5.910000e+00 6.12 1.000000e+00 1.000000e+00 4.000000e+00 8.000000e+00 3.300000e+01 ▇▂▁▁▁
calculated_host_listings_count_entire_homes 0 1.00 2.350000e+00 3.83 0.000000e+00 0.000000e+00 1.000000e+00 2.000000e+00 3.100000e+01 ▇▁▁▁▁
calculated_host_listings_count_private_rooms 0 1.00 3.490000e+00 5.33 0.000000e+00 0.000000e+00 1.000000e+00 5.000000e+00 3.300000e+01 ▇▁▁▁▁
calculated_host_listings_count_shared_rooms 0 1.00 7.000000e-02 0.64 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 9.000000e+00 ▇▁▁▁▁
reviews_per_month 3203 0.49 5.200000e-01 0.84 2.000000e-02 1.100000e-01 2.600000e-01 6.100000e-01 1.600000e+01 ▇▁▁▁▁

Data visualisations

# Price (per bedroom) distribution by room type
listings %>%
  filter (!is.na(room_type)) %>%
  mutate ( price_per_bedroom := price/bedrooms ) %>%
  ggplot(aes(x=price_per_bedroom, colour = room_type, alpha=0.4)) +
  geom_histogram() +
  facet_wrap(~room_type, scales = "free")

  theme_bw() +
  labs (title = "Price Distribution by Room Type")
List of 93
 $ line                      :List of 6
  ..$ colour       : chr "black"
  ..$ size         : num 0.5
  ..$ linetype     : num 1
  ..$ lineend      : chr "butt"
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ rect                      :List of 5
  ..$ fill         : chr "white"
  ..$ colour       : chr "black"
  ..$ size         : num 0.5
  ..$ linetype     : num 1
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ text                      :List of 11
  ..$ family       : chr ""
  ..$ face         : chr "plain"
  ..$ colour       : chr "black"
  ..$ size         : num 11
  ..$ hjust        : num 0.5
  ..$ vjust        : num 0.5
  ..$ angle        : num 0
  ..$ lineheight   : num 0.9
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ title                     : chr "Price Distribution by Room Type"
 $ aspect.ratio              : NULL
 $ axis.title                : NULL
 $ axis.title.x              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 2.75points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.x.top          :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 2.75points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.x.bottom       : NULL
 $ axis.title.y              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : num 90
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 2.75points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.title.y.left         : NULL
 $ axis.title.y.right        :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : num -90
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.75points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text                 :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : chr "grey30"
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x               :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 2.2points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x.top           :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : num 0
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 2.2points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.x.bottom        : NULL
 $ axis.text.y               :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 1
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.text.y.left          : NULL
 $ axis.text.y.right         :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.2points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ axis.ticks                :List of 6
  ..$ colour       : chr "grey20"
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ lineend      : NULL
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ axis.ticks.x              : NULL
 $ axis.ticks.x.top          : NULL
 $ axis.ticks.x.bottom       : NULL
 $ axis.ticks.y              : NULL
 $ axis.ticks.y.left         : NULL
 $ axis.ticks.y.right        : NULL
 $ axis.ticks.length         : 'simpleUnit' num 2.75points
  ..- attr(*, "unit")= int 8
 $ axis.ticks.length.x       : NULL
 $ axis.ticks.length.x.top   : NULL
 $ axis.ticks.length.x.bottom: NULL
 $ axis.ticks.length.y       : NULL
 $ axis.ticks.length.y.left  : NULL
 $ axis.ticks.length.y.right : NULL
 $ axis.line                 : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ axis.line.x               : NULL
 $ axis.line.x.top           : NULL
 $ axis.line.x.bottom        : NULL
 $ axis.line.y               : NULL
 $ axis.line.y.left          : NULL
 $ axis.line.y.right         : NULL
 $ legend.background         :List of 5
  ..$ fill         : NULL
  ..$ colour       : logi NA
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ legend.margin             : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
  ..- attr(*, "unit")= int 8
 $ legend.spacing            : 'simpleUnit' num 11points
  ..- attr(*, "unit")= int 8
 $ legend.spacing.x          : NULL
 $ legend.spacing.y          : NULL
 $ legend.key                :List of 5
  ..$ fill         : chr "white"
  ..$ colour       : logi NA
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ legend.key.size           : 'simpleUnit' num 1.2lines
  ..- attr(*, "unit")= int 3
 $ legend.key.height         : NULL
 $ legend.key.width          : NULL
 $ legend.text               :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ legend.text.align         : NULL
 $ legend.title              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ legend.title.align        : NULL
 $ legend.position           : chr "right"
 $ legend.direction          : NULL
 $ legend.justification      : chr "center"
 $ legend.box                : NULL
 $ legend.box.just           : NULL
 $ legend.box.margin         : 'margin' num [1:4] 0cm 0cm 0cm 0cm
  ..- attr(*, "unit")= int 1
 $ legend.box.background     : list()
  ..- attr(*, "class")= chr [1:2] "element_blank" "element"
 $ legend.box.spacing        : 'simpleUnit' num 11points
  ..- attr(*, "unit")= int 8
 $ panel.background          :List of 5
  ..$ fill         : chr "white"
  ..$ colour       : logi NA
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ panel.border              :List of 5
  ..$ fill         : logi NA
  ..$ colour       : chr "grey20"
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ panel.spacing             : 'simpleUnit' num 5.5points
  ..- attr(*, "unit")= int 8
 $ panel.spacing.x           : NULL
 $ panel.spacing.y           : NULL
 $ panel.grid                :List of 6
  ..$ colour       : chr "grey92"
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ lineend      : NULL
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ panel.grid.major          : NULL
 $ panel.grid.minor          :List of 6
  ..$ colour       : NULL
  ..$ size         : 'rel' num 0.5
  ..$ linetype     : NULL
  ..$ lineend      : NULL
  ..$ arrow        : logi FALSE
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_line" "element"
 $ panel.grid.major.x        : NULL
 $ panel.grid.major.y        : NULL
 $ panel.grid.minor.x        : NULL
 $ panel.grid.minor.y        : NULL
 $ panel.ontop               : logi FALSE
 $ plot.background           :List of 5
  ..$ fill         : NULL
  ..$ colour       : chr "white"
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ plot.title                :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 1.2
  ..$ hjust        : num 0
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.title.position       : chr "panel"
 $ plot.subtitle             :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : num 0
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 0points 0points 5.5points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.caption              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : num 1
  ..$ vjust        : num 1
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 5.5points 0points 0points 0points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.caption.position     : chr "panel"
 $ plot.tag                  :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : 'rel' num 1.2
  ..$ hjust        : num 0.5
  ..$ vjust        : num 0.5
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ plot.tag.position         : chr "topleft"
 $ plot.margin               : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
  ..- attr(*, "unit")= int 8
 $ strip.background          :List of 5
  ..$ fill         : chr "grey85"
  ..$ colour       : chr "grey20"
  ..$ size         : NULL
  ..$ linetype     : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_rect" "element"
 $ strip.background.x        : NULL
 $ strip.background.y        : NULL
 $ strip.placement           : chr "inside"
 $ strip.text                :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : chr "grey10"
  ..$ size         : 'rel' num 0.8
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : NULL
  ..$ lineheight   : NULL
  ..$ margin       : 'margin' num [1:4] 4.4points 4.4points 4.4points 4.4points
  .. ..- attr(*, "unit")= int 8
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ strip.text.x              : NULL
 $ strip.text.y              :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : num -90
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 $ strip.switch.pad.grid     : 'simpleUnit' num 2.75points
  ..- attr(*, "unit")= int 8
 $ strip.switch.pad.wrap     : 'simpleUnit' num 2.75points
  ..- attr(*, "unit")= int 8
 $ strip.text.y.left         :List of 11
  ..$ family       : NULL
  ..$ face         : NULL
  ..$ colour       : NULL
  ..$ size         : NULL
  ..$ hjust        : NULL
  ..$ vjust        : NULL
  ..$ angle        : num 90
  ..$ lineheight   : NULL
  ..$ margin       : NULL
  ..$ debug        : NULL
  ..$ inherit.blank: logi TRUE
  ..- attr(*, "class")= chr [1:2] "element_text" "element"
 - attr(*, "class")= chr [1:2] "theme" "gg"
 - attr(*, "complete")= logi TRUE
 - attr(*, "validate")= logi TRUE
listings %>%
  filter (!is.na(neighbourhood_cleansed)) %>%
  mutate ( price_per_bedroom := price/bedrooms ) %>%
ggplot(aes( x = factor(neighbourhood_cleansed))) +
  geom_boxplot(aes(y = price_per_bedroom)) +
  theme( axis.text.x = element_text( angle= 45, hjust = 1)) +  
  scale_y_continuous(limits = c(0,2500)) +
  labs(title = "Box Plot of Price per Bedroom by Neighbourhoods")

listings <- listings %>% 
  mutate(log_price := log(price)) # Mutate a new column showing log price
ggpairs(listings, columns = c("log_price",  "accommodates",  "bedrooms", "availability_30", "availability_60", "review_scores_rating", "beds", "number_of_reviews", "minimum_nights"))

listings %>%
  filter (!is.na(host_is_superhost)) %>%
  mutate ( price_per_bedroom := price/bedrooms ) %>%
ggplot(aes( x = factor(host_is_superhost))) +
  geom_boxplot(aes(y = price_per_bedroom)) +
  theme( axis.text.x = element_text( angle= 45, hjust = 1)) +  
  scale_y_continuous(limits = c(0,2500)) +
  labs(title = "Box Plot of Price per Bedroom by the host")

Propery types

listings <- listings %>%
  mutate(prop_type_simplified = case_when(
    property_type %in% c("Entire villa","Entire residential home", "Farm stay","Private room in farm stay") ~ property_type, 
    TRUE ~ "Other"
  ))

Use the code below to check that prop_type_simplified was correctly made.

listings %>%
  count(property_type, prop_type_simplified) %>%
  arrange(desc(n))        
property_typeprop_type_simplifiedn
Entire villaEntire villa812
Entire residential homeEntire residential home620
Farm stayFarm stay618
Private room in farm stayPrivate room in farm stay556
Entire cottageOther516
Private room in kezhanOther375
Private room in villaOther290
Room in boutique hotelOther279
Private room in residential homeOther273
Room in hotelOther233
Entire bungalowOther217
Private room in cottageOther207
Entire rental unitOther156
Entire townhouseOther135
Private room in bed and breakfastOther118
Private room in serviced apartmentOther84
Private room in resortOther80
Private room in bungalowOther68
Entire loftOther61
KezhanOther60
Private room in nature lodgeOther53
Private room in townhouseOther46
Entire cabinOther40
Private room in rental unitOther40
Shared room in hostelOther32
Entire condominium (condo)Other28
Entire serviced apartmentOther28
Private room in hostelOther26
Private roomOther23
Earth houseOther22
Room in aparthotelOther17
Private room in loftOther15
Private room in guesthouseOther14
Entire placeOther11
Entire chaletOther10
CampsiteOther9
Private room in earth houseOther7
Entire bed and breakfastOther6
Entire home/aptOther6
Private room in cabinOther6
Private room in caveOther5
Private room in guest suiteOther5
Private room in minsuOther5
BarnOther4
Entire guest suiteOther4
Private room in barnOther4
Private room in castleOther4
RanchOther4
Shared room in bed and breakfastOther4
Shared room in cottageOther4
Shared room in farm stayOther4
Shared room in kezhanOther4
Casa particularOther3
MinsuOther3
Private room in condominium (condo)Other3
Shared room in boutique hotelOther3
Tiny houseOther3
CastleOther2
Entire guesthouseOther2
Entire resortOther2
HutOther2
Private room in hutOther2
Private room in ranchOther2
Private room in tiny houseOther2
Private room in treehouseOther2
Shared room in rental unitOther2
Shared room in villaOther2
CaveOther1
Entire hostelOther1
Holiday parkOther1
HouseboatOther1
PensionOther1
Private room in camper/rvOther1
Private room in dome houseOther1
Private room in ryokanOther1
Private room in shipping containerOther1
RiadOther1
Shared room in earth houseOther1
Shared room in townhouseOther1
TreehouseOther1

Airbnb is most commonly used for travel purposes, i.e., as an alternative to traditional hotels. We only want to include listings in our regression analysis that are intended for travel purposes:

listings %>%
  count(minimum_nights)

minimum_nightsn
16197
244
38
41
52
75
108
152
297
3018
3601
3653
The most common value for minimum_nights is 1.

There are some unusual figures for minimum_nights such as, Airbnb does this to encourage customers to stay longer and spend more money.`

listings <- listings %>% 
  filter(minimum_nights <= 4)

Mapping

leaflet(data = filter(listings, minimum_nights <= 4)) %>% 
  addProviderTiles("OpenStreetMap.Mapnik") %>% 
  addCircleMarkers(lng = ~longitude, 
                   lat = ~latitude, 
                   radius = 1, 
                   fillColor = "blue", 
                   fillOpacity = 0.4, 
                   popup = ~listing_url,
                   label = ~property_type)